Purpose

This document summarizes Rick Gilmore’s analysis of participant sorting data using graph and network analysis tools.

Set-up

Import data

Jaccard indices

The Jaccard index data are found in analysis/data/jaccard.csv.

jaccard_raw <- readr::read_csv("analysis/data/jaccard.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Exemplar.Row = col_double(),
##   Exemplar.Col = col_double(),
##   Jaccard = col_double(),
##   Group = col_character()
## )
str(jaccard_raw)
## spec_tbl_df [950 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Exemplar.Row: num [1:950] 1 1 1 1 1 1 1 1 1 1 ...
##  $ Exemplar.Col: num [1:950] 2 2 2 2 2 3 3 3 3 3 ...
##  $ Jaccard     : num [1:950] 0.0476 0.1186 0.1228 0.2 0.2692 ...
##  $ Group       : chr [1:950] "P31M" "P3M1" "P6M" "P6" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Exemplar.Row = col_double(),
##   ..   Exemplar.Col = col_double(),
##   ..   Jaccard = col_double(),
##   ..   Group = col_character()
##   .. )

It’s probably wise to reorder the data frame by wallpaper group, Jaccard index, and exemplar index.

jaccard <- jaccard_raw %>%
  dplyr::arrange(., Group, Exemplar.Row, desc(Jaccard))

Let’s add a Jaccard mean and median by Exemplar.Row.

jaccard_aug <- jaccard %>%
  dplyr::group_by(., Group, Exemplar.Row) %>%
  dplyr::mutate(.,
    j_mean = mean(Jaccard),
    j_med = median(Jaccard),
    j_max = max(Jaccard),
    j_min = min(Jaccard)
  )

Make edge, node tibbles

p1 <- jaccard %>%
  dplyr::filter(., Group == "P1")

p1_edges <- tibble(from = p1$Exemplar.Row,
                   to = p1$Exemplar.Col,
                   weight = p1$Jaccard)
p1_nodes <- tibble(id = 1:20)

Make network

p1_network <- network::network(p1_edges, vertex.attr = p1_nodes, 
                      matrix.type = "edgelist", ignore.eval = FALSE,
                      directed = FALSE)

Plotting

plot(p1_network, vertex.cex = 3, mode='circle')

Let’s pick the top ten strongest connections.

p1_tidy <- tidygraph::tbl_graph(nodes = p1_nodes, edges = p1_edges,
                                directed = FALSE)

ggraph::ggraph(p1_tidy) + geom_edge_link() + geom_node_point() + theme_graph()
## Using `stress` as default layout

ggraph(p1_tidy, layout = "graphopt") + 
  geom_node_point() +
  geom_edge_link(aes(width = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 2)) +
  geom_node_text(aes(label = id), repel = TRUE) +
  labs(edge_width = "Jaccard") +
  theme_graph()

ggraph(p1_tidy, layout = "linear") + 
  geom_edge_arc(aes(width = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 2)) +
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

Let’s pick the top two exemplars to plot.

p1_e8 <- p1_tidy %>%
  activate(edges) %>%
  dplyr::filter(., from == 8)

ggraph(p1_e8, layout = "linear") + 
  geom_edge_arc(aes(width = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 2)) +
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

p1_e10 <- p1_tidy %>%
  activate(edges) %>%
  dplyr::filter(., from == 10 | to == 10)

ggraph(p1_e10, layout = "linear") + 
  geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 2)) +
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

p1_e10 <- p1_tidy %>%
  activate(edges) %>%
  dplyr::filter(., from == 10 | to == 10)

ggraph(p1_e10, layout = "graphopt") + 
  geom_edge_link(aes(width = weight, color = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 2)) +
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

p1_selected <- p1_tidy %>%
  activate(edges) %>%
  dplyr::filter(., from == 8 | to == 8)

ggraph(p1_selected, layout = "graphopt") + 
  geom_edge_link(aes(width = weight, color = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 2)) +
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

p1_selected <- p1_tidy %>%
  activate(edges) %>%
  dplyr::filter(., from == 8 | to == 8)

ggraph(p1_selected, layout = "linear") + 
  geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 2)) +
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

p1_selected <- p1_tidy %>%
  activate(edges) %>%
  dplyr::filter(., from == 10 | to == 10)

ggraph(p1_selected, layout = "linear") + 
  geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 2)) +
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

jaccard_aug %>% 
  dplyr::filter(., Group == "P1") %>%
  dplyr::arrange(., desc(j_mean))
## # A tibble: 190 x 8
## # Groups:   Group, Exemplar.Row [19]
##    Exemplar.Row Exemplar.Col Jaccard Group j_mean j_med j_max  j_min
##           <dbl>        <dbl>   <dbl> <chr>  <dbl> <dbl> <dbl>  <dbl>
##  1           19           20  0.32   P1     0.32  0.32  0.32  0.32  
##  2           16           20  0.375  P1     0.244 0.26  0.375 0.0820
##  3           16           17  0.32   P1     0.244 0.26  0.375 0.0820
##  4           16           19  0.2    P1     0.244 0.26  0.375 0.0820
##  5           16           18  0.0820 P1     0.244 0.26  0.375 0.0820
##  6           10           16  0.404  P1     0.237 0.222 0.404 0.0820
##  7           10           15  0.347  P1     0.237 0.222 0.404 0.0820
##  8           10           20  0.347  P1     0.237 0.222 0.404 0.0820
##  9           10           19  0.269  P1     0.237 0.222 0.404 0.0820
## 10           10           12  0.222  P1     0.237 0.222 0.404 0.0820
## # … with 180 more rows

It looks like exemplars 19 and 16 are are among the highest.

jaccard_aug %>% 
  dplyr::filter(., Group == "P1") %>%
  dplyr::arrange(., j_mean)
## # A tibble: 190 x 8
## # Groups:   Group, Exemplar.Row [19]
##    Exemplar.Row Exemplar.Col Jaccard Group j_mean j_med j_max  j_min
##           <dbl>        <dbl>   <dbl> <chr>  <dbl> <dbl> <dbl>  <dbl>
##  1           18           19   0.1   P1     0.1   0.1   0.1   0.1   
##  2           18           20   0.1   P1     0.1   0.1   0.1   0.1   
##  3           11           13   0.226 P1     0.168 0.182 0.226 0.0833
##  4           11           17   0.226 P1     0.168 0.182 0.226 0.0833
##  5           11           14   0.204 P1     0.168 0.182 0.226 0.0833
##  6           11           15   0.182 P1     0.168 0.182 0.226 0.0833
##  7           11           18   0.182 P1     0.168 0.182 0.226 0.0833
##  8           11           20   0.182 P1     0.168 0.182 0.226 0.0833
##  9           11           16   0.121 P1     0.168 0.182 0.226 0.0833
## 10           11           12   0.102 P1     0.168 0.182 0.226 0.0833
## # … with 180 more rows

18 and 11 among the lowest

p1_selected <- p1_tidy %>%
  activate(edges) %>%
  dplyr::filter(., from == 18 | to == 18)

ggraph(p1_selected, layout = "linear") + 
  geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.2, 2)) +
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

p1_selected <- p1_tidy %>%
  activate(edges) %>%
  dplyr::filter(., from == 19 | to == 19)

g <- ggraph(p1_selected, layout = "linear") + 
  geom_edge_arc(aes(width = weight, color = weight), alpha = 0.8) + 
  scale_edge_width(range = c(0.1, 4), limits = c(0, .6)) +
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

g

p1_selected <- p1_tidy %>%
  activate(edges) %>%
  dplyr::filter(., from == 19 | to == 19)

p1_selected <- p1_selected %>%
  dplyr::mutate(weight = cut(weight, c(0, .1, .2, .3, .4, .5, .6)))

g <- ggraph(p1_selected, layout = "linear", circular = TRUE) + 
  geom_edge_arc(aes(color = factor(weight))) + 
  geom_node_text(aes(label = id)) +
  labs(edge_width = "Jaccard") +
  theme_graph()

g

plot_jaccard_vals <- function(df, exemplar_id, group, 
                              j_stat_type = "mean", 
                              j_stat_val = NA) {
  df <- df %>%
    activate(edges) %>%
    dplyr::filter(., from == exemplar_id | to == exemplar_id) %>%
    dplyr::mutate(weight = cut(weight, c(0, .2, .4, .6, .8), 
                               labels = c("<.2", ".2-.4", ".4-.6", ">.6")))
  
  f_stat <- format(j_stat_val, digits = 2, nsmall = 2)
  
  ggraph(df, layout = "linear", circular = TRUE) +
    geom_edge_arc(aes(color = weight)) +
    geom_node_text(aes(label = id)) +
    ggtitle(paste0(group, " | # ", exemplar_id, " | ", j_stat_type, " Jaccard ", f_stat)) +
    theme_graph() + 
    coord_fixed()
}

plot_jaccard_vals(p1_tidy, 11, "P1")

plot_jaccard_vals_2 <- function(df, exemplar_id, group, 
                              j_stat_type = "mean", 
                              j_stat_val = NA) {
  df <- df %>%
    activate(edges) %>%
    dplyr::filter(., from == exemplar_id | to == exemplar_id)
  
  f_stat <- format(j_stat_val, digits = 2, nsmall = 2)
  
  ggraph(df, layout = "linear", circular = TRUE) +
    geom_edge_arc(aes(color = weight)) +
    geom_node_text(aes(label = id)) +
    ggtitle(paste0(group, " | # ", exemplar_id, " | ", j_stat_type, " Jaccard ", f_stat)) +
    theme_graph() + 
    coord_fixed()
}

plot_jaccard_vals_2(p1_tidy, 11, "P1")

plot_jaccard_vals_3 <- function(df, exemplar_id, group, 
                              j_stat_type = "mean", 
                              j_stat_val = NA) {
  df <- df %>%
    activate(edges) %>%
    dplyr::filter(., from == exemplar_id | to == exemplar_id) %>%
    dplyr::mutate(weight = cut(weight, c(0, .2, .4, .6, .8), 
                               labels = c("<.2", ".2-.4", ".4-.6", ">.6")))
  
  f_stat <- format(j_stat_val, digits = 2, nsmall = 2)
  
  ggraph(df, layout = "linear", circular = TRUE) +
    geom_edge_arc(aes(linetype = factor(weight),
                      color = factor(weight))) +
    geom_node_text(aes(label = id)) +
    ggtitle(paste0(group, " | # ", exemplar_id, " | ", j_stat_type, " Jaccard ", f_stat)) +
    theme_graph() + 
    coord_fixed()
}

plot_jaccard_vals_3(p1_tidy, 11, "P1")

Define functions to select wallpaper group

wp_graph <- function(df, group) {
  out_df <- df %>%
    dplyr::filter(., Group == group)

  df_edges <- tibble(from = out_df$Exemplar.Row,
                     to = out_df$Exemplar.Col,
                     weight = out_df$Jaccard)
  
  df_nodes <- tibble(id = 1:20)
 
  tidygraph::tbl_graph(nodes = df_nodes, 
                       edges = df_edges,
                       directed = FALSE)
}
jaccard_stats <- function(jaccard) {
  jaccard %>%
    dplyr::mutate(., exemplar_pair = paste0(Exemplar.Row, "-", Exemplar.Col)) %>%
    dplyr::group_by(., Group) %>%
    dplyr::summarise(
      .,
      Jaccard_mean = mean(Jaccard),
      Jaccard_med = median(Jaccard),
      Jaccard_max = max(Jaccard),
      Jaccard_min = min(Jaccard),
      Exemplar.Row = Exemplar.Row,
      Jaccard = Jaccard,
      exemplar_pair = exemplar_pair
    )
}

Test jaccard_stats().

graph <- wp_graph(jaccard, "P31M")
j_stats <- jaccard_stats(jaccard_raw)
## `summarise()` has grouped output by 'Group'. You can override using the `.groups` argument.

Define some helper functions to pick extremes of mean(Jaccard), max(Jaccard), and min(Jaccard).

These no longer seem quite right, so I am setting eval=FALSE on this section of code.

pick_extreme_mean_exemplars <- function(j_stats, group, hi_lo = "hi", n_exemplars = 1) {
  this_group <- j_stats %>%
    dplyr::filter(., Group == group)
  
  if (hi_lo == "hi") {
    this_group <- this_group %>%
      dplyr::arrange(., desc(Jaccard_mean))
  } else {
    this_group <- this_group %>%
      dplyr::arrange(., Jaccard_mean)
  }
  
  this_group$Exemplar.Row[1:n_exemplars]
}

pick_extreme_max_exemplars <- function(j_stats, group, hi_lo = "hi", n_exemplars = 1) {
  this_group <- j_stats %>%
    dplyr::filter(., Group == group)
  
  if (hi_lo == "hi") {
    this_group <- this_group %>%
      dplyr::arrange(., desc(Jaccard_max))
  } else {
    this_group <- this_group %>%
      dplyr::arrange(., Jaccard_max)
  }
  
  this_group$Exemplar.Row[1:n_exemplars]
}

pick_extreme_min_exemplars <- function(j_stats, group, hi_lo = "hi", n_exemplars = 1) {
  this_group <- j_stats %>%
    dplyr::filter(., Group == group)
  
  if (hi_lo == "hi") {
    this_group <- this_group %>%
      dplyr::arrange(., desc(Jaccard_min))
  } else {
    this_group <- this_group %>%
      dplyr::arrange(., Jaccard_min)
  }
  
  this_group$Exemplar.Row[1:n_exemplars]
}

Test functions

jaccard <- jaccard_raw %>%
  dplyr::arrange(., Group, Exemplar.Row, desc(Jaccard))

j_stats <- jaccard_stats(jaccard_raw)
## `summarise()` has grouped output by 'Group'. You can override using the `.groups` argument.
this_group = "P31M"
this_graph <- wp_graph(jaccard, this_group)

exemplars_w_max_jaccard <- j_stats %>%
  dplyr::filter(., Group == this_group,
                Jaccard_max == Jaccard)

exemplars_w_min_jaccard <- j_stats %>%
  dplyr::filter(., Group == this_group,
                Jaccard_min == Jaccard)

plot_jaccard_vals_3(this_graph, 
                  as.character(exemplars_w_max_jaccard[1, 'Exemplar.Row']), 
                  "P31M", 
                  "max", 
                  as.numeric(exemplars_w_max_jaccard[1, 'Jaccard']))

plot_jaccard_vals_3(this_graph, 
                  as.character(exemplars_w_min_jaccard[1, 'Exemplar.Row']), 
                  "P31M", 
                  "min", 
                  as.numeric(exemplars_w_min_jaccard[1, 'Jaccard']))

# Jaccard heatmaps

Let’s try to visualize the Jaccard indices as a heatmap.

heatmap() requires a matrix, so we have to convert jaccard to a matrix.

Let’s pick one of the wp groups to make our lives easier.

p31m <- jaccard %>%
  dplyr::filter(., Group == "P31M")

Let’s see if we can assign values using Exemplar.Row and Exemplar.Col.

p31m_matrix <- matrix(nrow = 20, ncol = 20)

for (r in 1:190) {
  p31m_matrix[p31m$Exemplar.Row[r], p31m$Exemplar.Col[r]] <- p31m$Jaccard[r]
}

# # Add identity values
# for (r in 1:20) {
#   p31m_matrix[r, r] <- 1
# }

p31m_matrix
##       [,1]       [,2]     [,3]       [,4]       [,5]       [,6]       [,7]
##  [1,]   NA 0.04761905 0.137931 0.13793103 0.17857143 0.15789474 0.10000000
##  [2,]   NA         NA 0.031250 0.24528302 0.13793103 0.06451613 0.65000000
##  [3,]   NA         NA       NA 0.06451613 0.24528302 0.08196721 0.06451613
##  [4,]   NA         NA       NA         NA 0.06451613 0.13793103 0.15789474
##  [5,]   NA         NA       NA         NA         NA 0.11864407 0.20000000
##  [6,]   NA         NA       NA         NA         NA         NA 0.04761905
##  [7,]   NA         NA       NA         NA         NA         NA         NA
##  [8,]   NA         NA       NA         NA         NA         NA         NA
##  [9,]   NA         NA       NA         NA         NA         NA         NA
## [10,]   NA         NA       NA         NA         NA         NA         NA
## [11,]   NA         NA       NA         NA         NA         NA         NA
## [12,]   NA         NA       NA         NA         NA         NA         NA
## [13,]   NA         NA       NA         NA         NA         NA         NA
## [14,]   NA         NA       NA         NA         NA         NA         NA
## [15,]   NA         NA       NA         NA         NA         NA         NA
## [16,]   NA         NA       NA         NA         NA         NA         NA
## [17,]   NA         NA       NA         NA         NA         NA         NA
## [18,]   NA         NA       NA         NA         NA         NA         NA
## [19,]   NA         NA       NA         NA         NA         NA         NA
## [20,]   NA         NA       NA         NA         NA         NA         NA
##             [,8]       [,9]      [,10]      [,11]     [,12]      [,13]
##  [1,] 0.24528302 0.22222222 0.11864407 0.17857143 0.1379310 0.13793103
##  [2,] 0.03125000 0.24528302 0.08196721 0.03125000 0.1578947 0.06451613
##  [3,] 0.20000000 0.11864407 0.24528302 0.15789474 0.1186441 0.17857143
##  [4,] 0.11864407 0.46666667 0.17857143 0.17857143 0.1186441 0.15789474
##  [5,] 0.15789474 0.08196721 0.11864407 0.11864407 0.2222222 0.06451613
##  [6,] 0.20000000 0.04761905 0.13793103 0.24528302 0.1785714 0.22222222
##  [7,] 0.08196721 0.15789474 0.10000000 0.01538462 0.1785714 0.10000000
##  [8,]         NA 0.06451613 0.15789474 0.10000000 0.2452830 0.17857143
##  [9,]         NA         NA 0.20000000 0.13793103 0.1000000 0.10000000
## [10,]         NA         NA         NA 0.24528302 0.1000000 0.29411765
## [11,]         NA         NA         NA         NA 0.1000000 0.17857143
## [12,]         NA         NA         NA         NA        NA 0.11864407
## [13,]         NA         NA         NA         NA        NA         NA
## [14,]         NA         NA         NA         NA        NA         NA
## [15,]         NA         NA         NA         NA        NA         NA
## [16,]         NA         NA         NA         NA        NA         NA
## [17,]         NA         NA         NA         NA        NA         NA
## [18,]         NA         NA         NA         NA        NA         NA
## [19,]         NA         NA         NA         NA        NA         NA
## [20,]         NA         NA         NA         NA        NA         NA
##            [,14]      [,15]      [,16]      [,17]      [,18]      [,19]
##  [1,] 0.06451613 0.22222222 0.11864407 0.20000000 0.08196721 0.20000000
##  [2,] 0.43478261 0.03125000 0.08196721 0.22222222 0.10000000 0.06451613
##  [3,] 0.04761905 0.15789474 0.17857143 0.11864407 0.17857143 0.22222222
##  [4,] 0.29411765 0.11864407 0.13793103 0.11864407 0.20000000 0.11864407
##  [5,] 0.06451613 0.11864407 0.10000000 0.10000000 0.24528302 0.04761905
##  [6,] 0.10000000 0.26923077 0.34693878 0.17857143 0.20000000 0.32000000
##  [7,] 0.34693878 0.04761905 0.06451613 0.17857143 0.10000000 0.08196721
##  [8,] 0.06451613 0.34693878 0.20000000 0.08196721 0.15789474 0.26923077
##  [9,] 0.34693878 0.08196721 0.10000000 0.15789474 0.15789474 0.15789474
## [10,] 0.10000000 0.24528302 0.26923077 0.15789474 0.11864407 0.15789474
## [11,] 0.10000000 0.26923077 0.29411765 0.15789474 0.11864407 0.13793103
## [12,] 0.06451613 0.22222222 0.08196721 0.06451613 0.13793103 0.10000000
## [13,] 0.11864407 0.13793103 0.26923077 0.17857143 0.29411765 0.26923077
## [14,]         NA 0.08196721 0.11864407 0.15789474 0.15789474 0.11864407
## [15,]         NA         NA 0.15789474 0.11864407 0.11864407 0.29411765
## [16,]         NA         NA         NA 0.26923077 0.20000000 0.22222222
## [17,]         NA         NA         NA         NA 0.08196721 0.15789474
## [18,]         NA         NA         NA         NA         NA 0.15789474
## [19,]         NA         NA         NA         NA         NA         NA
## [20,]         NA         NA         NA         NA         NA         NA
##            [,20]
##  [1,] 0.04761905
##  [2,] 0.43478261
##  [3,] 0.04761905
##  [4,] 0.24528302
##  [5,] 0.06451613
##  [6,] 0.08196721
##  [7,] 0.34693878
##  [8,] 0.04761905
##  [9,] 0.26923077
## [10,] 0.13793103
## [11,] 0.04761905
## [12,] 0.11864407
## [13,] 0.13793103
## [14,] 0.50000000
## [15,] 0.06451613
## [16,] 0.13793103
## [17,] 0.20000000
## [18,] 0.15789474
## [19,] 0.13793103
## [20,]         NA

Ok, lets’ try heatmap.

heatmap(p31m_matrix, Rowv = NA, Colv = NA, symm = TRUE, col = cm.colors(256))

Create a function to do this for each WP group.

plot_wp_heatmap <- function(df, group) {
  this_df <- df %>%
    dplyr::filter(., Group %in% group)
  
  this_matrix <- matrix(nrow = 20*length(group), ncol = 20)
  
  for (r in 1:190) {
    this_matrix[this_df$Exemplar.Row[r], this_df$Exemplar.Col[r]] <-
      this_df$Jaccard[r]
  }
  
  heatmap(this_matrix, 
          Rowv = NA, 
          Colv = NA, 
          symm = TRUE, 
          main = group,
          col= colorRampPalette(RColorBrewer::brewer.pal(3, "Oranges"))(3))
  
  legend(x="bottomright", 
         legend=c("low", "mid", "high"), 
         fill=colorRampPalette(RColorBrewer::brewer.pal(3, "Oranges"))(3))
}

Let’s try it.

plot_wp_heatmap(jaccard, "P1")

plot_wp_heatmap(jaccard, "P31M")

plot_wp_heatmap(jaccard, "P3M1")

plot_wp_heatmap(jaccard, "P6")

plot_wp_heatmap(jaccard, "P6M")

Or, there’s a tidyHeatmap package.

p31_heatmap <- 
    p31m %>% 
      tidyHeatmap::heatmap(Exemplar.Row, Exemplar.Col, Jaccard )
## tidyHeatmap says: (once per session) from release 1.2.3 the grouping labels have white background by default. To add color for one-ay grouping specify palette_grouping = list(c("red", "blue"))
p31_heatmap